In [1]:
import graphlab
In [2]:
products = graphlab.SFrame('amazon_baby.gl/')
In [3]:
products.head()
Out[3]:
In [4]:
products['word_count'] = graphlab.text_analytics.count_words(products['review'])
In [5]:
products.head()
Out[5]:
In [8]:
graphlab.canvas.set_target('browser')
In [9]:
products['name'].show()
In [10]:
giraffe_reviews = products[products['name'] == "Vulli Sophie the Giraffe Teether"]
In [11]:
len(giraffe_reviews)
Out[11]:
In [13]:
giraffe_reviews['rating'].show(view='Categorical') #shows a sorted histogram
In [14]:
products['rating'].show(view='Categorical') #shows counts and %'s for each rating
In [15]:
#Ignore all 3-star review
products = products[products['rating'] != 3]
In [17]:
#positive sentiment is 4-star or 5-star reviews
products['sentiment'] = products['rating'] >= 4
In [18]:
products.head()
Out[18]:
In [19]:
train_data,test_data = products.random_split(.8, seed=0)
In [20]:
sentiment_model = graphlab.logistic_classifier.create(train_data,
target='sentiment',
features=['word_count'],
validation_set=test_data)
In [26]:
sentiment_model.evaluate(test_data, metric='roc_curve')
Out[26]:
In [27]:
'''
roc_curve = precision recall curve
fpr = false positive ratio
tpr = true positive ratio
p = positive
n = negative
'''
Out[27]:
In [28]:
sentiment_model.show(view='Evaluation')
In [29]:
giraffe_reviews['predicted_sentiment'] = sentiment_model.predict(giraffe_reviews, output_type='probability')
In [36]:
giraffe_reviews.head()
Out[36]:
In [39]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)
In [40]:
giraffe_reviews.head()
Out[40]:
In [41]:
giraffe_reviews[0]['review']
Out[41]:
In [43]:
giraffe_reviews[1]['review']
Out[43]:
In [44]:
giraffe_reviews[-1]['review']
Out[44]:
In [45]:
giraffe_reviews[-2]['review']
Out[45]:
In [ ]: